**************************************************************************************************************************
                                                    PART I REGRESSION
**************************************************************************************************************************

*comenzamos con SUM como variable dependiente


reg sum cereal land1000 forest1000 energy1000 iron imports1000 gci2b gci8b gci3 roads ease gci10a gdp_pc inflation gci4b gci1a taxrate timetax newcontract Distance
estimates store aa 

*Rodamos un 2sls controlando energy por variables "z"

ivreg sum (energy1000=gasoline) cereal land1000 forest1000 iron imports1000 gci2b gci8b gci3 roads ease gci10a gdp_pc inflation gci4b gci1a taxrate timetax newcontract Distance, first
estimates store bb

*ahora probamos TRANS como variable dependiente
reg trans cereal land1000 forest1000 energy1000 iron imports1000 gci2b gci8b gci3 roads ease gci10a gdp_pc inflation gci4b gci1a taxrate timetax newcontract Distance
estimates store ccc 

ivreg trans (energy1000=gasoline) cereal land1000 forest1000 iron imports1000 gci2b gci8b gci3 roads ease gci10a gdp_pc inflation gci4b gci1a taxrate timetax newcontract Distance, first
estimates store dd

*vamos a probar interpetando las variables con log level/ level log
gen logsum=log(sum)
gen logenergy=log(energy1000)
*level log
ivreg sum (logenergy=gasoline iron forest1000 cereal) imports gci2b gci8b gci3 roads ease gci10a gdp_pc inflation gci4b gci1a taxrate timetax newcontract Distance, first
*"If we increase x by one percent, we expect y to increase by (�1/100) units of y."


*log level

reg logsum cereal land1000 forest1000 energy1000 iron imports1000 gci2b gci8b gci3 roads ease gci10a gdp_pc inflation gci4b gci1a taxrate timetax newcontract Distance
estimates store ee 

ivreg logsum (energy1000=gasoline iron forest1000 cereal) imports1000 gci2b gci8b gci3 roads ease gci10a gdp_pc inflation gci4b gci1a taxrate timetax newcontract Distance, first
*�if we change x by 1 (unit), we�d expect our y variable to change by 100��1 percent�
estimates store ff

*log log (pierde significancia)
ivreg logsum (logenergy=gasoline iron forest1000 cereal) imports1000 gci2b gci8b gci3 roads ease gci10a gdp_pc inflation gci4b gci1a taxrate timetax newcontract Distance, first
*�if we change x by one percent, we�d expect y to change by �1 percent�


esttab aa bb ccc dd, r2


****************************************************************************************************************************
                       PART II : The Effect of Region on the Chinese OFDI: Additive and Interaction Models
****************************************************************************************************************************

*********************PRIMERO CON SUM COMO DV********************************

* WITH NO REGIONS
reg sumenergy cereal land1000 forest1000 energy1000 iron imports1000 gci2b gci8b gci3 roads ease gci10a gdp_pc inflation gci4b gci1a taxrate timetax newcontract Distance
predict predicted1, xb

twoway scatter predicted1 energy1000, mlabel(country)

*WITH REGIONS
reg sumenergy cereal land1000 forest1000 energy1000 iron imports1000 gci2b gci8b gci3 roads ease gci10a gdp_pc inflation gci4b gci1a taxrate timetax newcontract Distance/*
*/ africa carus europe meast nac sa seasia oc
predict predicted2, xb

twoway scatter predicted2 energy1000, mlabel(country)

*WITH INTERACTIVE VARIABLES
generate energy_nac=energy1000*nac
generate energy_africa=energy1000*africa
generate energy_carus=energy1000*carus
generate energy_eur=energy1000*europe
generate energy_me=energy1000*meast
generate energy_sa=energy1000*sa
generate energy_seasia=energy1000*seasia
generate energy_oc=energy1000*oc


*hay que definir una region de referencia. En este caso en relacion a Norte America

reg sumenergy cereal land1000 forest1000 energy1000 iron imports1000 gci2b gci8b gci3 roads ease gci10a gdp_pc inflation gci4b gci1a taxrate timetax newcontract Distance/*
*/ africa carus europe meast sa seasia oc energy_africa energy_carus energy_eur energy_me  energy_sa  energy_seasia energy_oc

predict predicted, xb
twoway scatter predicted energy1000, mlabel(country)


*********************LUEGO CON TRANS COMO DV********************************
* WITH NO REGIONS
reg transenergy cereal land1000 forest1000 energy1000 iron imports1000 gci2b gci8b gci3 roads ease gci10a gdp_pc inflation gci4b gci1a taxrate timetax newcontract Distance
predict predicted3, xb

twoway scatter predicted3 energy1000, mlabel(country)

*WITH REGIONS
reg transenergy cereal land1000 forest1000 energy1000 iron imports1000 gci2b gci8b gci3 roads ease gci10a gdp_pc inflation gci4b gci1a taxrate timetax newcontract Distance/*
*/ africa carus europe meast nac sa seasia oc
predict predicted4, xb

twoway scatter predicted4 energy1000, mlabel(country)

*WITH INTERACTIVE VARIABLES

*hay que definir una region de referencia. En este caso en relacion a Norte America

reg transenergy cereal land1000 forest1000 energy1000 iron imports1000 gci2b gci8b gci3 roads ease gci10a gdp_pc inflation gci4b gci1a taxrate timetax newcontract Distance/*
*/ africa carus europe meast sa seasia oc energy_africa energy_carus energy_eur energy_me  energy_sa  energy_seasia energy_oc

predict predicted5, xb
twoway scatter predicted5 energy1000, mlabel(country)



***********************************************************************************************************
                                             PART III SPATIAL MODEL
***********************************************************************************************************
*Ahora vamos a rodar un modelo espacial para testear que la regi�n no tenga relevancia en el contagio de FDI

findit spatwmat //install Pisati's spatial code 
set more off 

//Load weights matrices (if I write `standarize` it row standarize the matrix for me)

spatwmat using "C:\Users\Usuario\Desktop\BACKUP\FRAN\Essex Summer School\Trabajo Chinese OFDI\imatrix96_2", name(W) e(E) standardize

//Load data

use "C:\Users\Usuario\Dropbox\New Paper\BASE Y DO FILE\BASE", clear 


**** PRIMERO CON SUM COMO VD********************
//local and global indicators for spatial association 

spatlsa sumenergy , w(W) moran geary id(country) two 
spatgsa sumenergy , w(W) moran geary two 

//obtain OLS (non-spatial) estimates 

reg sumenergy energy1000


//LM tests (original and robust) 
//Note: Moran score here is the z state for the global Moran's I on the OLS residuals 
spatdiag, weights(W)


**********************
//need this to run the plot

program genmsp, sortpreserve
version 12.1
syntax varname, Weights(name) [Pvalue(real 0.05)]
unab Y : `varlist'
tempname W
matrix `W' = `weights'
tempvar Z
qui summarize `Y'
qui generate `Z' = (`Y' - r(mean)) / sqrt( r(Var) * ( (r(N)-1) / r(N) ) )
qui cap drop std_`Y'
qui generate std_`Y' = `Z'
tempname z Wz
qui mkmat `Z', matrix(`z')
matrix `Wz' = `W'*`z'
matrix colnames `Wz' = Wstd_`Y'
qui cap drop Wstd_`Y'
qui svmat `Wz', names(col)
qui spatlsa `Y', w(`W') moran
tempname M
matrix `M' = r(Moran)
matrix colnames `M' = __c1 __c2 __c3 zval_`Y' pval_`Y'
qui cap drop __c1 __c2 __c3
qui cap drop zval_`Y'
qui cap drop pval_`Y'
qui svmat `M', names(col)
qui cap drop __c1 __c2 __c3
qui cap drop msp_`Y'
qui generate msp_`Y' = .
qui replace msp_`Y' = 1 if std_`Y'<0 & Wstd_`Y'<0 & pval_`Y'<`pvalue'
qui replace msp_`Y' = 2 if std_`Y'<0 & Wstd_`Y'>0 & pval_`Y'<`pvalue'
qui replace msp_`Y' = 3 if std_`Y'>0 & Wstd_`Y'<0 & pval_`Y'<`pvalue'
qui replace msp_`Y' = 4 if std_`Y'>0 & Wstd_`Y'>0 & pval_`Y'<`pvalue'
lab def __msp 1 "Low-Low" 2 "Low-High" 3 "High-Low" 4 "High-High", modify
lab val msp_`Y' __msp
end
exit
*****************************************************************

genmsp sumenergy, w(W)


graph twoway ///
	(scatter Wstd_sumenergy  std_sumenergy  ///
		if pval_sumenergy  >= 0.05, ///
		msymbol(i) mlabel(country) mlabsize(*0.6) mlabpos(c)) ///
	(scatter Wstd_sumenergy  std_sumenergy  ///
		if pval_sumenergy < 0.05, ///
		msymbol(i) mlabel(country) mlabsize(*0.6) mlabpos(c) mlabcol(red)) ///
	(lfit Wstd_sum std_sum), yline(0, lpattern(--)) xline(0, lpattern(--)) ///
	xlabel(-3(1)3, labsize(*0.8)) ///
	xtitle("{it:z}") ///
	ylabel(-3(1)3, angle(0) labsize(*0.8)) ///
	ytitle("{it:Wz}") ///
	legend(off) scheme(s1color) 

	
		
***************************Model Estimation**************************************

//Non-Spatial OLS model 
reg sumenergy energy1000
spatdiag, weights(W)


//Run's a Spatial AutoRegressive (SAR) lag model 
spatreg sumenergy energy1000, r weights(W) eigenval(E) model(lag)

//Run's a Spatial AutoRegressive (SAR) error model 
spatreg sumenergy energy1000, r weights(W) eigenval(E) model(error)


****AHORA CON TRANS COMO VD / ES NECESARIO RECARGAR LA BASE*********************************
//local and global indicators for spatial association 


spatwmat using "C:\Users\Usuario\Desktop\BACKUP\FRAN\Essex Summer School\Trabajo Chinese OFDI\imatrix96_2", name(W) e(E) standardize

//Load data

use "C:\Users\Usuario\Dropbox\New Paper\BASE Y DO FILE\BASE", clear 


spatlsa transenergy , w(W) moran geary id(country) two 
spatgsa transenergy , w(W) moran geary two 

//obtain OLS (non-spatial) estimates 

reg transenergy energy1000


//LM tests (original and robust) 
//Note: Moran score here is the z state for the global Moran's I on the OLS residuals 
spatdiag, weights(W)


**********************
//need this to run the plot

program genmsp, sortpreserve
version 12.1
syntax varname, Weights(name) [Pvalue(real 0.05)]
unab Y : `varlist'
tempname W
matrix `W' = `weights'
tempvar Z
qui summarize `Y'
qui generate `Z' = (`Y' - r(mean)) / sqrt( r(Var) * ( (r(N)-1) / r(N) ) )
qui cap drop std_`Y'
qui generate std_`Y' = `Z'
tempname z Wz
qui mkmat `Z', matrix(`z')
matrix `Wz' = `W'*`z'
matrix colnames `Wz' = Wstd_`Y'
qui cap drop Wstd_`Y'
qui svmat `Wz', names(col)
qui spatlsa `Y', w(`W') moran
tempname M
matrix `M' = r(Moran)
matrix colnames `M' = __c1 __c2 __c3 zval_`Y' pval_`Y'
qui cap drop __c1 __c2 __c3
qui cap drop zval_`Y'
qui cap drop pval_`Y'
qui svmat `M', names(col)
qui cap drop __c1 __c2 __c3
qui cap drop msp_`Y'
qui generate msp_`Y' = .
qui replace msp_`Y' = 1 if std_`Y'<0 & Wstd_`Y'<0 & pval_`Y'<`pvalue'
qui replace msp_`Y' = 2 if std_`Y'<0 & Wstd_`Y'>0 & pval_`Y'<`pvalue'
qui replace msp_`Y' = 3 if std_`Y'>0 & Wstd_`Y'<0 & pval_`Y'<`pvalue'
qui replace msp_`Y' = 4 if std_`Y'>0 & Wstd_`Y'>0 & pval_`Y'<`pvalue'
lab def __msp 1 "Low-Low" 2 "Low-High" 3 "High-Low" 4 "High-High", modify
lab val msp_`Y' __msp
end
exit
*****************************************************************

genmsp transenergy, w(W)


graph twoway ///
	(scatter Wstd_transenergy  std_transenergy  ///
		if pval_transenergy  >= 0.05, ///
		msymbol(i) mlabel(country) mlabsize(*0.6) mlabpos(c)) ///
	(scatter Wstd_transenergy  std_transenergy  ///
		if pval_transenergy < 0.05, ///
		msymbol(i) mlabel(country) mlabsize(*0.6) mlabpos(c) mlabcol(red)) ///
	(lfit Wstd_sum std_sum), yline(0, lpattern(--)) xline(0, lpattern(--)) ///
	xlabel(-3(1)3, labsize(*0.8)) ///
	xtitle("{it:z}") ///
	ylabel(-3(1)3, angle(0) labsize(*0.8)) ///
	ytitle("{it:Wz}") ///
	legend(off) scheme(s1color) 
